# SETUP  

# Uncomment the following lines to install packages if they are not already installed
# install.packages("tidyverse")
# install.packages("mediation")
# install.packages("boot")
# install.packages("lsr")
# install.packages("emmeans")
# install.packages("rsq")
# install.packages("car")

# Load libraries
library(tidyverse)   # data manipulation and visualization
library(ggplot2)     # plotting
library(dplyr)       # data manipulation (also in tidyverse, but explicit)
library(car)         # ANOVA and type III sums of squares
library(mediation)   # mediation analysis
library(lsr)         # etaSquared function for effect sizes
library(emmeans)     # estimated marginal means for contrasts
library(rsq)         # partial R2 calculations
library(boot)        # bootstrapping functions

set.seed(123)  # ensures reproducibility for bootstraps and sampling
options(stringsAsFactors = FALSE)   # avoid automatic factor conversion
options(contrasts = c("contr.sum", "contr.poly"))  # sum-to-zero contrasts for Type III ANOVA


# navigating to user's downloads
# if the zip file is not in your downloads folder, set file.path() manually
downloads_dir <- if (.Platform$OS.type == "windows") {
  file.path(Sys.getenv("USERPROFILE"), "Downloads")
} else {
  file.path(Sys.getenv("HOME"), "Downloads")
}

# Getting path to the ZIP file
zip_file_path <- file.path(downloads_dir, "2011-AsymmetricDiscounting-AppeltEtAl.zip")


# unzip 
extract_dir <- tempdir()
unzip(zip_file_path, exdir = extract_dir)

data <- read.csv(file.path(extract_dir,"2011-AsymmetricDiscounting-AppeltEtAl/Asym_Disc_2011_Study1/Asym_Disc_Study1_CLEANPIIremoved.csv"))

View(data) 
numRows <- nrow(data) 
print(numRows)  


# DEMOGRAPHICS 

PercentFemale <- (filter(data, Sex == "Female") %>% nrow() / numRows * 100) %>% round(0) 
print(PercentFemale)  

# remove two errors in birth year
CleanAges <- data %>% filter(!(Year_birth < 1800), !(Year_birth > 2011))
Age <- 2009 - CleanAges$Year_birth
MeanAge <- mean(Age)
print(MeanAge)

StanDevAge <- round(sd(Age), 2) 
print(StanDevAge)  

PercentAtLeastTwoYearDegree <- (data %>% filter(LevelofEducation != "No degree", LevelofEducation != "High school diploma") %>% nrow() / numRows * 100) %>% round(0) 
print(PercentAtLeastTwoYearDegree)  

PercentMarried <- (data %>% filter(Status.1 == "Married") %>% nrow() / numRows * 100) %>% round(0) 
print(PercentMarried)  

PercentHaveChildren <- (data %>% filter(Children > 0) %>% nrow() / numRows * 100) %>% round(0) 
print(PercentHaveChildren)  

# Define the levels in the correct order
income_strings <- c("less than $10,000",
                   "$10,000 - $19,999",
                   "$20,000 - $34,999",
                   "$35,000- $49,999",
                   "$50,000 - $99,999",
                   "$100,000 - $199,999",
                   "greater than $200,000")

# Convert to ordered factor
data$income_factor <- factor(data$LevelofIncome, 
                        levels = income_strings, 
                        ordered = TRUE)

# Convert to rank-ordered numbers
income_numeric <- as.numeric(data$income_factor)


MedianIncome = median(income_numeric, na.rm = TRUE) 
income_strings[MedianIncome] 


# RESULTS   

# Adding Sign and Direction Columns For Readability 
data <- data %>%
  mutate(
    sign = case_when(
      condition %in% c(85, 86) ~ "gain",
      condition %in% c(87, 88) ~ "loss"
    ),
    direction = case_when(
      condition %in% c(85, 87) ~ "delay",
      condition %in% c(86, 88) ~ "acc"))

#check number of rows
nrow(filter(data, sign == "loss"))
nrow(filter(data, sign == "gain"))
nrow(filter(data, direction == "delay"))
nrow(filter(data, direction == "acc"))

nrow(filter(data, sign == "gain", direction == "delay"))
nrow(filter(data, sign == "gain", direction == "acc"))
nrow(filter(data, sign == "loss", direction == "delay"))
nrow(filter(data, sign == "loss", direction == "acc"))


#Impute Switch Points for Those Who Did Not Swap (Switch type is all variable or all fixed)
# 1.Gain Delayed (condition == 85)
# a.switch type all_fixed - 90 is highest amount offered for delay condition 
data$sig_var[which(data$condition == 85 & data$switch_type == "all_fixed")] <- 95 
nrow(filter(data, sig_var == 95))

# b.switch type all var - 40 is lowest amount offered for delay condition   
data$sig_var[which(data$condition == 85 & data$switch_type == "all_var")] <- 40 
nrow(filter(data, condition == 85, sig_var == 40))  

#2 Gain Accelerated (condition == 86) 
# a.switch type all_fixed - 85 is highest amount offered for acc condition 
data$sig_var[which(data$condition == 86 & data$switch_type == "all_fixed")] <- 90 
nrow(filter(data, condition == 86, sig_var == 90)) 

# b.switch type all var - 35 is lowest amount offered for acc condition 
data$sig_var[which(data$condition == 86 & data$switch_type == "all_var")] <- 35
nrow(filter(data, condition == 86, sig_var == 35)) 

#3 Loss Delayed (condition = 87) 
# a.switch type all_fixed -  40 is lowest amount offered for delay condition 
data$sig_var[which(data$condition == 87 & data$switch_type == "all_fixed")] <- 35 
nrow(filter(data, condition == 87, sig_var == 35)) 

# b. switch type all_variable - 90 is highest amount offered for delay condition 
data$sig_var[which(data$condition == 87 & data$switch_type == "all_var")] <- 90 
nrow(filter(data, condition == 87, sig_var == 90))  

#4 Loss Accelerated (condition == 88)
# a.switch type all_fixed -  35 is lowest amount offered for acc condition 
data$sig_var[which(data$condition == 88 & data$switch_type == "all_fixed")] <- 30 
nrow(filter(data, condition == 88, sig_var == 30)) 
# b.switch type all_var - 85 is highest amount offered for acc condition 
data$sig_var[which(data$condition == 88 & data$switch_type == "all_var")] <- 85
nrow(filter(data, condition == 88, sig_var == 90))  


## find indiference point
# the indifference point is the option switched at, averaged with the next available option 
# this is 5 higher or 5 lower depending on whether the condition is gain or loss
gain_conditions <- data$sign == "gain"
data$indif_point[gain_conditions] <- (data$sig_var[gain_conditions] + (data$sig_var[gain_conditions] - 5)) / 2

loss_conditions <- data$sign == "loss"
data$indif_point[loss_conditions] <- (data$sig_var[loss_conditions] + (data$sig_var[loss_conditions] + 5)) / 2


#1.2.1 Asymmetric discounting

# Calculate Discount Rates (k)
data$k <- 0 

# Display k for each category

# Delayed
delay_rows <- data$direction == "delay"
data$k[delay_rows] <- (data$indif_point[delay_rows] - 50) / (50 * 1/4) 
round(mean(data$k[delay_rows]), 2)

# Accelerated
accelerate_rows <- data$direction == "acc"
data$k[accelerate_rows] <- (75 - data$indif_point[accelerate_rows]) / (data$indif_point[accelerate_rows] * 1/4) 
round(mean(data$k[accelerate_rows]), 2)

#Gains
round(mean(filter(data, sign == "gain")$k), 2) 
round(sd(filter(data, sign == "gain")$k), 2)  

#Losses
round(mean(filter(data, sign == "loss")$k), 2) 
round(sd(filter(data, sign == "loss")$k), 2)  

#Delayed gains
round(mean(filter(data, sign == "gain", direction == "delay")$k), 2) 
round(sd(filter(data, sign == "gain", direction == "delay")$k), 2)

#Accelerated gains
round(mean(filter(data, sign == "gain", direction == "acc")$k), 2) 
round(sd(filter(data, sign == "gain", direction == "acc")$k), 2)

#Delayed losses
round(mean(filter(data, sign == "loss", direction == "delay")$k), 2)
round(sd(filter(data, sign == "loss", direction == "delay")$k), 2)

#Accelerated losses
round(mean(filter(data, sign == "loss", direction == "acc")$k), 2)
round(sd(filter(data, sign == "loss", direction == "acc")$k), 2)
 
#ANOVA

#Convert sign and direction to factors
data$sign <- factor(data$sign, levels = c("gain", "loss"))
data$direction <- factor(data$direction, levels = c("delay", "acc"))


#Performing ANOVA with interaction between sign and direction
anova_model <- aov(k ~ sign * direction, data = data)
anova_table <- Anova(anova_model, type = "III")
anova_table

#Compute partial eta squared for each effect
SS_effect <- anova_table$`Sum Sq`[2:4]  # sign, direction, sign:direction
SS_error  <- anova_table$`Sum Sq`[5]    # Residuals
partial_eta2 <- SS_effect / (SS_effect + SS_error)
names(partial_eta2) <- rownames(anova_table)[2:4]
partial_eta2

# Planned contrast tests
em <- emmeans(anova_model, ~ direction | sign)
contrast(em, "pairwise", simple = "each")

#Cohen's d (effect size for contrasts)
cohens_d <- function(x, g) {
  grp1 <- x[g == levels(g)[1]]
  grp2 <- x[g == levels(g)[2]]
  mean_diff <- mean(grp1) - mean(grp2)
  pooled_sd <- sqrt((var(grp1) + var(grp2)) / 2)
  d <- mean_diff / pooled_sd
  return(d)}

# Direction effect for gains
gain_data <- data %>% filter(sign == "gain")
loss_data <- data %>% filter(sign == "loss")
d_gain <- cohens_d(gain_data$k, gain_data$direction)
d_gain

# Direction effect for losses
# put loss first here so don't need to take abs value (?)
loss_data$direction <- factor(loss_data$direction, levels = c("acc", "delay"))
d_loss <- cohens_d(loss_data$k, loss_data$direction)
d_loss

#1.2.2 Clustering and balance of thoughts


# Figure 1: Discounting by sign (gain vs. loss) and by direction(delay vs. acceleration) in Study 1.
plot_data <- data %>%
  group_by(sign, direction) %>%
  summarise(
    mean_k = mean(k, na.rm = TRUE),
    se = sd(k, na.rm = TRUE) / sqrt(n())
  ) %>%
  ungroup()

plot_data$direction <- factor(plot_data$direction,
                              levels = c("delay", "acc"),
                              labels = c("Delay", "Acceleration"))


ggplot(plot_data, aes(x = sign, y = mean_k, fill = direction)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = mean_k - se, ymax = mean_k + se),
                position = position_dodge(0.9), width = 0.2) +
  scale_y_continuous(breaks = seq(-0.4, 1.6, by = 0.4)) +
  labs(x = "Sign", y = "k", fill = "Direction")


#Table 2

# Notes
# N for loss delay and loss acc in the paper was swapped in the figure, but this doesn't impact analysi
# The range for loss acc says 3.6 in the figure in the paper, but should be 5.8

table2 <- data %>%
  group_by(sign, direction) %>%
  summarise(
    N = n(),
    M = round(mean(k, na.rm = TRUE), 2),
    SD = round(sd(k, na.rm = TRUE), 2),
    Range = round(max(k, na.rm = TRUE) - min(k, na.rm = TRUE), 2)
  ) %>%
  ungroup() %>%
  mutate(
    Condition = case_when(
      sign == "gain" & direction == "delay" ~ "Gain delay",
      sign == "gain" & direction == "acc"   ~ "Gain acceleration",
      sign == "loss" & direction == "delay" ~ "Loss delay",
      sign == "loss" & direction == "acc"   ~ "Loss acceleration"
    )
  ) %>%
  select(Condition, N, M, SD, Range)

table2

# range of number of thoughts listed
range(data$num_aspects)
# mean number of thoughts listed
mean(data$num_aspects)
# SD of number of thoughts listed
sd(data$num_aspects)


#need to import list of thoughts by participant so can see the ordering of thoughts to determine their rank
thoughts_listed <- read.csv(file.path(extract_dir,
                                      "2011-AsymmetricDiscounting-AppeltEtAl/Asym_Disc_2011_Study1/Raw Data Hashed/Aspects_Listed.csv"))

#only look at thoughts for the 607 participants analyzed in the main file
thoughts_listed_filtered <- thoughts_listed %>%
  filter(serial %in% data$SerialID)

thoughts_listed_filtered %>% summarise(unique_serials = n_distinct(serial))
li
nrow(thoughts_listed_filtered) #3532 rows across the 607 participants


thoughts_favor <- thoughts_listed_filtered %>%
  filter(category_text %in% c(
    "favors having the gift certificate now",
    "favors paying the fine right away",
    "favors having the gift certificate later",
    "favors paying the fine later"
  )) %>%
  mutate(favor_type = case_when(
    category_text %in% c(
      "favors having the gift certificate now",
      "favors paying the fine right away"
    ) ~ "now",
    category_text %in% c(
      "favors having the gift certificate later",
      "favors paying the fine later"
    ) ~ "later"
  ))

# rank - each aspect_id for each serial id (serial column) position by timestamp (when_added )
# this should be aspect_id 
thoughts_ranked <- thoughts_favor %>%
  group_by(serial) %>%
  arrange(aspect_id, .by_group = TRUE) %>%   # ensure sorted by aspect_id inside each serial
  mutate(rank_order = min_rank(aspect_id)) %>%     # lowest aspect_id = rank 1
  ungroup()

total_aspects_per_serial <- thoughts_listed_filtered %>%
  group_by(serial) %>%
  summarise(total_aspects = n_distinct(aspect_id))


#If no now thoughts were listed, then the mean rank of now thoughts is one larger than the number of thoughts
favor_medians <- thoughts_ranked %>%
  group_by(serial) %>%
  summarise(
    n_now   = sum(favor_type == "now"),
    n_later = sum(favor_type == "later"),
    mr_now  = median(rank_order[favor_type == "now"], na.rm = TRUE),
    mr_later = median(rank_order[favor_type == "later"], na.rm = TRUE)
  ) %>%
  mutate(
    # if one of now or later was 0, then the denominator should be num thoughts favoring + 1
    asp_num_favor = ifelse(
      n_now == 0 | n_later == 0,
      n_now + n_later + 1,
      n_now + n_later
    ),
    
    # if the number of thoughts listed was 0 for favors now, then the rank of now thoughts is one higher than the number of thoughts listed
    # and vice versa for later thoughts
    mr_now   = ifelse(n_now == 0, asp_num_favor, mr_now),
    mr_later = ifelse(n_later == 0, asp_num_favor, mr_later),
    
    # compute SMRD
    SMRD = 2 * (mr_later - mr_now) / asp_num_favor
  )


favor_medians <- favor_medians %>% mutate(SMRD = round(SMRD, 2))


data2 <- left_join(data, favor_medians %>%  select(serial, SMRD, n_now, n_later, asp_num_favor, mr_now, mr_later),
    by = c("SerialID" = "serial"))

data2 <- data2 %>%
  filter(!is.na(SMRD))

mean(data2$SMRD)

# Calculate mean SMRD by condition
smrd_by_condition <- data2 %>%
  group_by(direction) %>% 
  summarise(
    mean_SMDR = mean(SMRD, na.rm = TRUE),
    sd_SMDR = sd(SMRD, na.rm = TRUE),
    n = n()
  )

smrd_by_condition

#t-test
t_test_result <- t.test(SMRD ~ direction, data = data2, var.equal = TRUE)
t_test_result

#Cohen's d
d_smrd <- cohens_d(data2$SMRD, data2$direction)
d_smrd

data2$rel_now <- data2$n_now - data2$n_later
d_relnow <- cohens_d(data2$rel_now, data2$direction)
d_relnow


#relative number of now thoughts
mean(data2$n_now - data2$n_later)

relativeNowThoughts <- data2 %>%
  group_by(direction) %>% 
  summarise(
    relativeNowThoughts = mean(rel_now),
    sdrelativeNowThoughts = sd(rel_now)
  )
relativeNowThoughts

#mediation analysis
#direction -> SMRD a
#SMRD-> now thoughts control for direction b
#direction -> now thoughts c
#direction -> now thoughts control for smrd c'


data2$rel_now_z <- (data2$rel_now - mean(data2$rel_now, na.rm = TRUE)) / sd(data2$rel_now, na.rm = TRUE)
data2$SMRD_z     <- round((data2$SMRD - mean(data2$SMRD, na.rm = TRUE)) / sd(data2$SMRD, na.rm = TRUE), 2)
data2$direction <- factor(data2$direction, levels = c("acc", "delay"))

# Step 1: rel_now_z ~ direction
step1 <- lm(rel_now_z ~ direction, data = data2)
summary(step1)
# Partial R2 for direction
r2_step1 <- summary(step1)$r.squared  # since only one predictor, R2 = partial R2
r2_step1

# Step 2: SMRD ~ direction
step2 <- lm(SMRD ~ direction, data = data2)
summary(step2)
# Partial R2 for direction
r2_step2 <- summary(step2)$r.squared
r2_step2

step3 <- lm(rel_now_z ~ direction + SMRD, data = data2)
summary(step3)
# Partial R2 for each predictor in step 3
install.packages("rsq") 
# rsq.partial calculates partial R2
partial_rsq <- rsq.partial(step3)
# convert to regular decimal notation
data.frame(
  Predictor = partial_rsq$variable,
  Partial_R2 = round(partial_rsq$partial.rsq, 3)
)

#bootstrap tests
# Bootstrapping the indirect effect
# Function to calculate a, b, ab, c, c'
med_fun <- function(data, idx) {
  d <- data[idx, ]
  fit_c  <- lm(rel_now_z ~ direction, data = d)       # c
  fit_a  <- lm(SMRD ~ direction, data = d)           # a
  fit_b  <- lm(rel_now_z ~ direction + SMRD, data = d) # b & c'
  a <- coef(fit_a)["direction1"]
  b <- coef(fit_b)["SMRD"]
  c_val <- coef(fit_c)["direction1"]
  c_prime <- coef(fit_b)["direction1"]
  ab <- a * b
  return(c(a = a, b = b, ab = ab, c = c_val, c_prime = c_prime))
}


# Run bootstrap with 1000 replications
boot_results <- boot(data = data2, statistic = med_fun, R = 100)

# Original coefficients
coef_table <- data.frame(Coefficient = names(boot_results$t0), Value = boot_results$t0)
coef_table

# Bootstrap confidence intervals for ab (indirect effect)
#figure out how to set to 99
boot.ci(boot_results, conf = 0.999, type = c("perc", "bca"), index = 3)  # index=3 is ab

#1.2.3

# prominence of now thoughts
# av(zscoreSMRD, relativeNumNowThoughts) for each participant
# Compute prominence as the average of these two z-scores
data2$sign <- factor(data2$sign, levels = c("gain", "loss"))
data2$direction <- factor(data2$direction, levels = c("delay", "acc"))

data2$prominence_now <- rowMeans(data2[, c("SMRD_z", "rel_now_z")], na.rm = TRUE)
summary(data2$prominence_now)

# Means and SDs by sign
prominence_by_sign <- data2 %>%
  group_by(sign) %>%
  summarise(
    M = mean(prominence_now, na.rm = TRUE),
    SD = sd(prominence_now, na.rm = TRUE),
    n = n()
  )

prominence_by_sign

# Means and SDs by direction
prominence_by_direction <- data2 %>%
  group_by(direction) %>%
  summarise(
    M = mean(prominence_now, na.rm = TRUE),
    SD = sd(prominence_now, na.rm = TRUE),
    n = n()
  )

prominence_by_direction

#ANOVA of prom now thoughts: loss/gain delay/acc interaction of sign and direction
prominence_model <- aov(prominence_now ~ sign * direction, data = data2)
Anova(prominence_model, type = "III")
# partial chi squared
etaSquared(prominence_model, type = 3)  # type III sums of squares


#1.2.4
#linear regression: prominence of now thoughts by discouting  for gains vs losees
gains <- subset(data2, sign == "gain")
losses <- subset(data2, sign == "loss")
# Gains
gain_model <- lm(k ~ prominence_now, data = gains)
summary(gain_model)
partial_r2_gain <- summary(gain_model)$r.squared
partial_r2_gain

# Losses
loss_model <- lm(k~ prominence_now, data = losses)
summary(loss_model)
partial_r2_loss <- summary(loss_model)$r.squared
partial_r2_loss

#1.2.5 
#Mediation of discounting by prominence of now thoughts
#bootstrapping tests 

#Step 1: X-> Y (total effect)
model_total <- lm(k ~ direction, data = gains)

#Step 2: X -> M
model_mediator <- lm(prominence_now ~ direction, data = gains)

#Step 3: X + M -> Y (direct effect)
model_direct <- lm(k ~ direction + prominence_now, data = gains)

summary(model_total)
summary(model_mediator)
summary(model_direct)


med_gain <- mediate(model_mediator, model_direct, treat = "direction", mediator = "prominence_now", boot = TRUE, sims = 100)
summary(med_gain)
losses <- subset(data2, sign == "loss")

model_total_loss <- lm(k ~ direction, data = losses)
model_mediator_loss <- lm(prominence_now ~ direction, data = losses)
model_direct_loss <- lm(k ~ direction + prominence_now, data = losses)

med_loss <- mediate(model_mediator_loss, model_direct_loss, treat = "direction", mediator = "prominence_now", boot = TRUE, sims = 5000)
summary(med_loss)
